/*

05_risk_binscatters.do

Purpose: create figure comparing simulated and real application risk
Inputs: student-level-panel
Outputs: binscatter_sim_vs_real_risk
	
*/

use "$int/student-level-panel", clear

// Subset sample on only rct eligible grades
keep if simulator == 1 & !mi(risk_real_initial)
	
gen app_place_prob = 1 - app_risk
gen real_initial_place_prob = 1 - risk_real_initial
label var app_place_prob "Predicted placement probability"
label var real_initial_place_prob"True placement probability"

egen real_probq = cut(real_initial_place_prob), at(0.00(.1)1.1)

collapse (mean) app_place_prob (p25) app_place_prob25=app_place_prob ///
	(p75) app_place_prob75=app_place_prob, by(real_probq)

twoway (scatter app_place_prob real_probq,  ///
	msymbol(O) mfcol(ebblue%80) mlcol(ebblue) msize(large) mcol(ebblue) ) ///
	(function y = x, range(real_probq) lcol(gray) lpat(solid) lwid(medthick)) ///
	(rarea app_place_prob25 app_place_prob75 real_probq, sort col(ebblue%25)) ///
	, ///
	scheme(s1color) name(g1, replace) ///
	xtitle(True placement probability) ytitle(Predicted placement probability) ///
	legend(row(1) order(2 "True" 3 "Interquartile Range")) ///
	ysc(range(0 1)) ylab(0(0.2)1)

graph export "$figures/binscatter_sim_vs_real_risk.png", width(3200) replace